library(ggplot2)
library(dplyr)
library(tidyverse)
library(rjson)
library(jsonlite)
library(lubridate)
library(ggbump)
library(plotly)
library(zoo)
netflix <- read.csv("netflix watch history.csv")
netflix_info <- read.csv("netflix_titles_mar23.csv")
netflix_top10_countries <- read_tsv("all-weeks-countries.tsv")
Netflix Data Cleaning and Merging
# Get the title (for TV shows, the season and episode name are listed after a colon)
netflix$Name <- str_split_i(netflix$Title, ":", 1)
# Merge my streamed Netflix content with dataset of Netflix content with additional attributes
netflix_merged <- netflix %>%
left_join(netflix_info, by=c("Name"="title"))
# Find titles that I've watched but aren't in the dataset of the Netflix content I'm merging my data with
# netflix_merged[is.na(netflix_merged$type),]
# ^ returns 58 rows with missing values
# Explore the these rows and change the title names to the name that matches in the dataset
netflix[netflix$Name == "Doctor (Telugu)", ]$Name <- "Doctor"
netflix[netflix$Name == "Love Today (Telugu)", ]$Name <- "Love Today"
netflix[netflix$Name == "Fabulous Lives of Bollywood Wives", ]$Name <- "The Fabulous Lives of Bollywood Wives"
netflix[netflix$Name == "House of Secrets", ]$Name <- "House of Secrets: The Burari Deaths"
netflix[netflix$Name == "F3", ]$Name <- "F3: Fun and Frustration"
netflix[netflix$Name == "Ante Sundaraniki", ]$Name <- "Ante... Sundaraniki!"
netflix[netflix$Name == "Virata Parvam", ]$Name <- "Viraata Parvam"
netflix[netflix$Name == "Thimmarusu", ]$Name <- "Thimmarusu: Assignment Vali"
netflix[netflix$Name == "Hey Sinamika (Telugu)", ]$Name <- "Hey! Sinamika"
netflix[netflix$Name == "An Astrological Guide for Broken Hearts", ]$Name <- "An astrological guide for broken hearts"
netflix[netflix$Name == "DASH & LILY", ]$Name <- "Dash & Lily"
netflix[netflix$Name == "Kurup (Telugu)", ]$Name <- "Kurup"
netflix[netflix$Name == "Game Over (Telugu Version)", ]$Name <- "Game Over"
netflix[netflix$Name == "Taare Zameen Par", ]$Name <- "Like Stars on Earth"
# Merge my updated data with the matching titles to the dataset again
netflix_merged <- netflix %>%
left_join(netflix_info, by=c("Name"="title"))
# Keep potentially relevant columns
netflix_merged <- netflix_merged[,c("Title", "Date", "Name", "type", "release_year", "age_certification", "runtime", "genres", "production_countries", "seasons", "imdb_score", "imdb_votes")]
# Look if any more titles are missing the merged attributes
# unique(netflix_merged[is.na(netflix_merged$type), "Name"])
# ^ 11 unique missing titles
# Manually input these attributes if it has recently been added and that's why it's not in the dataset being merged with. If it's no longer on Netflix, drop it
netflix_merged[netflix_merged$Name == "Chor Nikal Ke Bhaga",]$type <- "MOVIE"
netflix_merged[netflix_merged$Name == "Chor Nikal Ke Bhaga",]$release_year <- 2023
netflix_merged[netflix_merged$Name == "Chor Nikal Ke Bhaga",]$age_certification <- "TV-MA"
netflix_merged[netflix_merged$Name == "Chor Nikal Ke Bhaga",]$runtime <- 110
netflix_merged[netflix_merged$Name == "Chor Nikal Ke Bhaga",]$genres <- "['thriller', 'drama', 'crime']"
netflix_merged[netflix_merged$Name == "Chor Nikal Ke Bhaga",]$production_countries <- "['IN']"
netflix_merged[netflix_merged$Name == "Chor Nikal Ke Bhaga",]$imdb_score <- 7.9
netflix_merged[netflix_merged$Name == "Chor Nikal Ke Bhaga",]$imdb_votes <- 43890
netflix_merged[netflix_merged$Name == "Butta Bomma",]$type <- "MOVIE"
netflix_merged[netflix_merged$Name == "Butta Bomma",]$release_year <- 2023
netflix_merged[netflix_merged$Name == "Butta Bomma",]$age_certification <- "TV-14"
netflix_merged[netflix_merged$Name == "Butta Bomma",]$runtime <- 122
netflix_merged[netflix_merged$Name == "Butta Bomma",]$genres <- "['romance', 'drama']"
netflix_merged[netflix_merged$Name == "Butta Bomma",]$production_countries <- "['IN']"
netflix_merged[netflix_merged$Name == "Butta Bomma",]$imdb_score <- 6.4
netflix_merged[netflix_merged$Name == "Butta Bomma",]$imdb_votes <- 294
netflix_merged[netflix_merged$Name == "Student of the Year",]$type <- "MOVIE"
netflix_merged[netflix_merged$Name == "Student of the Year",]$release_year <- 2012
netflix_merged[netflix_merged$Name == "Student of the Year",]$age_certification <- "TV-14"
netflix_merged[netflix_merged$Name == "Student of the Year",]$runtime <- 146
netflix_merged[netflix_merged$Name == "Student of the Year",]$genres <- "['comedy', 'drama', 'music', 'sport', 'romance']"
netflix_merged[netflix_merged$Name == "Student of the Year",]$production_countries <- "['IN']"
netflix_merged[netflix_merged$Name == "Student of the Year",]$imdb_score <- 5.2
netflix_merged[netflix_merged$Name == "Student of the Year",]$imdb_votes <- 18946
netflix_merged[netflix_merged$Name == "Old Enough!",]$type <- "SHOW"
netflix_merged[netflix_merged$Name == "Old Enough!",]$release_year <- 1991
netflix_merged[netflix_merged$Name == "Old Enough!",]$age_certification <- "TV-G"
netflix_merged[netflix_merged$Name == "Old Enough!",]$runtime <- 18
netflix_merged[netflix_merged$Name == "Old Enough!",]$genres <- "['reality']"
netflix_merged[netflix_merged$Name == "Old Enough!",]$production_countries <- "['JP']"
netflix_merged[netflix_merged$Name == "Old Enough!",]$seasons <- 12
netflix_merged[netflix_merged$Name == "Old Enough!",]$imdb_score <- 8.2
netflix_merged[netflix_merged$Name == "Old Enough!",]$imdb_votes <- 1070
# Remove the rows that still have missing attributes, as they are no longer on Netflix
netflix_merged <- netflix_merged[!is.na(netflix_merged$type),]
# Take care of duplicates and get the correct title
# head(sort(table(netflix_merged$Title), decreasing=TRUE), 10)
netflix_merged <- netflix_merged[!(netflix_merged$Title == "Dhamaka" &
netflix_merged$release_year==2022),]
netflix_merged <- netflix_merged[!(netflix_merged$Title == "Dostana" &
netflix_merged$release_year==1982),]
netflix_merged <- netflix_merged[!(netflix_merged$Title == "Zero: Episode 1" &
netflix_merged$release_year==2018),]
netflix_merged <- netflix_merged[!(netflix_merged$Title == "Zero: Episode 2" &
netflix_merged$release_year==2018),]
# netflix_merged[is.na(netflix_merged$id),]
# ^no more rows with missing attributes
# Update the genres column to only include the first genre
netflix_merged$genres <- gsub("'", '', str_split_i(gsub("\\[|\\]", "", netflix_merged$genres), ",", 1))
# Update production_countries column to only include the first production country
netflix_merged$production_countries <- gsub("'", '', str_split_i(gsub("\\[|\\]", "", netflix_merged$production_countries), ",", 1))
# Convert Date column to be Date type in R
netflix_merged$Date <- as.Date(netflix_merged$Date, "%m/%d/%y")
# Only include data from June 28th, 2021 or later because this is the starting date for when I have data for the weekly top 10 Netflix charts
netflix_merged <- netflix_merged[netflix_merged$Date >= "2021-06-28",]
# Get the unique titles of my content (so as to not repeat shows)- This will get the most recent instance that I watched the show
my_unique_titles <- netflix_merged[!duplicated(netflix_merged$Name),]
# Get only unique titles that were made in the US or India
my_unique_titles_us_in <- my_unique_titles[my_unique_titles$production_countries == "US" | my_unique_titles$production_countries == "IN",]
# Convert N/A values to NA
netflix_top10_countries[netflix_top10_countries$season_title == "N/A",]$season_title <- NA
# Get content from top 10 that made the charts in the US or India
netflix_top10_us_in <- netflix_top10_countries[netflix_top10_countries$country_iso2 == "US" | netflix_top10_countries$country_iso2 == "IN",]
# Merge with attributes from Netflix content dataset
netflix_top10_merged_us_in <- netflix_top10_us_in %>%
left_join(netflix_info, by=c("show_title"="title"))
# unique(netflix_top10_merged_us_in[is.na(netflix_top10_merged_us_in$id), "show_title"])
# 355 titles- unique missings
# Explore the missings and replace with the names that would match the names in the dataset merging with
netflix_top10_us_in$title <- netflix_top10_us_in$show_title
netflix_top10_us_in[netflix_top10_us_in$title == "RRR (Hindi)", ]$title <- "RRR"
netflix_top10_us_in[netflix_top10_us_in$title == "Thunivu (Hindi)", ]$title <- "Thunivu"
netflix_top10_us_in[netflix_top10_us_in$title == "Kantara (Hindi)", ]$title <- "Kantara"
netflix_top10_us_in[netflix_top10_us_in$title == "Crash Course in Romance", ]$title <- "Crash Course In Romance"
netflix_top10_us_in[netflix_top10_us_in$title == "Like An Afternoon Dream", ]$title <- "Nanpakal Nerathu Mayakkam"
netflix_top10_us_in[netflix_top10_us_in$title == "DSP (Hindi)", ]$title <- "DSP"
netflix_top10_us_in[netflix_top10_us_in$title == "Tegimpu", ]$title <- "Thunivu"
netflix_top10_us_in[netflix_top10_us_in$title == "Lockwood & Co.", ]$title <- "Lockwood & Co"
netflix_top10_us_in[netflix_top10_us_in$title == "Mumbai Mafia: Police vs The Underworld", ]$title <- "Mumbai Mafia: Police vs the Underworld"
netflix_top10_us_in[netflix_top10_us_in$title == "Matti Kusthi", ]$title <- "Gatta Kusthi"
netflix_top10_us_in[netflix_top10_us_in$title == "Tara VS. Bilal", ]$title <- "Tara vs Bilal"
netflix_top10_us_in[netflix_top10_us_in$title == "Love Today (Telugu)", ]$title <- "Love Today"
netflix_top10_us_in[netflix_top10_us_in$title == "Codename: Tiranga", ]$title <- "Code Name: Tiranga"
netflix_top10_us_in[netflix_top10_us_in$title == "Kalagathalaivan", ]$title <- "Kalaga Thalaivan"
netflix_top10_us_in[netflix_top10_us_in$title == "GodFather (Hindi)", ]$title <- "GodFather"
netflix_top10_us_in[netflix_top10_us_in$title == "Money Heist: Korea - Joint Economic Area", ]$title <- "Money Heist (Korean Remake)"
netflix_top10_us_in[netflix_top10_us_in$title == "Deception - Round D Corner", ]$title <- "Dhokha: Round D Corner"
netflix_top10_us_in[netflix_top10_us_in$title == "The Ghost (Hindi)", ]$title <- "The Ghost"
netflix_top10_us_in[netflix_top10_us_in$title == "Jamtara - Sabka Number Ayega", ]$title <- "Jamtara – Sabka Number Ayega"
netflix_top10_us_in[netflix_top10_us_in$title == "Half Bad: The Bastard Son & The Devil Himself", ]$title <- "The Bastard Son & the Devil Himself"
netflix_top10_us_in[netflix_top10_us_in$title == "The Ghost (Tamil)", ]$title <- "The Ghost"
netflix_top10_us_in[netflix_top10_us_in$title == "DAHMER", ]$title <- "Dahmer - Monster: The Jeffrey Dahmer Story"
netflix_top10_us_in[netflix_top10_us_in$title == "Fabulous Lives of Bollywood Wives", ]$title <- "The Fabulous Lives of Bollywood Wives"
netflix_top10_us_in[netflix_top10_us_in$title == "Katteri", ]$title <- "Kaatteri"
netflix_top10_us_in[netflix_top10_us_in$title == "Sherdil: The Pilibhit Saga", ]$title <- "Sherdil"
netflix_top10_us_in[netflix_top10_us_in$title == "Shabaash Mithu (Hindi)", ]$title <- "Shabaash Mithu"
netflix_top10_us_in[netflix_top10_us_in$title == "Foot Fairy (Hindi)", ]$title <- "Footfairy"
netflix_top10_us_in[netflix_top10_us_in$title == "Vaashi (Malayalam)", ]$title <- "Vaashi"
netflix_top10_us_in[netflix_top10_us_in$title == "Major (Hindi)", ]$title <- "Major"
netflix_top10_us_in[netflix_top10_us_in$title == "Ante Sundaraniki", ]$title <- "Ante... Sundaraniki!"
netflix_top10_us_in[netflix_top10_us_in$title == "Ante Sundaraniki (Tamil)", ]$title <- "Ante... Sundaraniki!"
netflix_top10_us_in[netflix_top10_us_in$title == "Major (Telugu)", ]$title <- "Major"
netflix_top10_us_in[netflix_top10_us_in$title == "Virata Parvam", ]$title <- "Viraata Parvam"
netflix_top10_us_in[netflix_top10_us_in$title == "Man Vs Bee", ]$title <- "Man vs. Bee"
netflix_top10_us_in[netflix_top10_us_in$title == "Raw (Hindi)", ]$title <- "Beast"
netflix_top10_us_in[netflix_top10_us_in$title == "Jana 2022 (Telugu)", ]$title <- "Jana Gana Mana"
netflix_top10_us_in[netflix_top10_us_in$title == "Radhe Shyam (Hindi)", ]$title <- "Radhe Shyam"
netflix_top10_us_in[netflix_top10_us_in$title == "Beast (Telugu)", ]$title <- "Beast"
netflix_top10_us_in[netflix_top10_us_in$title == "'83", ]$title <- "83"
netflix_top10_us_in[netflix_top10_us_in$title == "Etharkkum Thunindhavan (Hindi)", ]$title <- "Etharkkum Thunindhavan"
netflix_top10_us_in[netflix_top10_us_in$title == "Etharkkum Thunindhavan (Telugu)", ]$title <- "Etharkkum Thunindhavan"
netflix_top10_us_in[netflix_top10_us_in$title == "Hey Sinamika", ]$title <- "Hey! Sinamika"
netflix_top10_us_in[netflix_top10_us_in$title == "Hey Sinamika (Hindi)", ]$title <- "Hey! Sinamika"
netflix_top10_us_in[netflix_top10_us_in$title == "Hey Sinamika (Telugu)", ]$title <- "Hey! Sinamika"
netflix_top10_us_in[netflix_top10_us_in$title == "Against The Ice", ]$title <- "Against the Ice"
netflix_top10_us_in[netflix_top10_us_in$title == "PIECES OF HER", ]$title <- "Pieces of Her"
netflix_top10_us_in[netflix_top10_us_in$title == "In From the Cold", ]$title <- "In from the Cold"
netflix_top10_us_in[netflix_top10_us_in$title == "Munich – The Edge of War", ]$title <- "Munich: The Edge of War"
netflix_top10_us_in[netflix_top10_us_in$title == "Kurup (Hindi)", ]$title <- "Kurup"
netflix_top10_us_in[netflix_top10_us_in$title == "STAND BY ME Doraemon 2", ]$title <- "Stand by Me Doraemon 2"
netflix_top10_us_in[netflix_top10_us_in$title == "Kurup (Malayalam)", ]$title <- "Kurup"
netflix_top10_us_in[netflix_top10_us_in$title == "Kurup (Telugu)", ]$title <- "Kurup"
netflix_top10_us_in[netflix_top10_us_in$title == "Kurup (Tamil)", ]$title <- "Kurup"
netflix_top10_us_in[netflix_top10_us_in$title == "Annaatthe (Hindi)", ]$title <- "Annaatthe"
netflix_top10_us_in[netflix_top10_us_in$title == "Annaatthe (Telugu)", ]$title <- "Annaatthe"
netflix_top10_us_in[netflix_top10_us_in$title == "Doctor (Tamil)", ]$title <- "Doctor"
netflix_top10_us_in[netflix_top10_us_in$title == "Doctor (Telugu)", ]$title <- "Doctor"
netflix_top10_us_in[netflix_top10_us_in$title == "Monster Hunter", ]$title <- "Monster Hunter: Legends of the Guild"
netflix_top10_us_in[netflix_top10_us_in$title == "Tughlaq Durbar (Telugu)", ]$title <- "Tughlaq Durbar"
netflix_top10_us_in[netflix_top10_us_in$title == "Thimmarusu", ]$title <- "Thimmarusu: Assignment Vali"
netflix_top10_us_in[netflix_top10_us_in$title == "Boomika (Hindi)", ]$title <- "Boomika"
netflix_top10_us_in[netflix_top10_us_in$title == "The Last Letter From Your Lover", ]$title <- "The Last Letter from Your Lover"
netflix_top10_us_in[netflix_top10_us_in$title == "Fear Street Part 3: 1666", ]$title <- "Fear Street: 1666"
netflix_top10_us_in[netflix_top10_us_in$title == "Fear Street Part 2: 1978", ]$title <- "Fear Street: Part Two - 1978"
netflix_top10_us_in[netflix_top10_us_in$title == "How I Became a Superhero", ]$title <- "How I Became a Super Hero"
netflix_top10_us_in[netflix_top10_us_in$title == "Fear Street Part 1: 1994", ]$title <- "Fear Street: Part One - 1994"
netflix_top10_us_in[netflix_top10_us_in$title == "Surviving R. Kelly Part III: The Final Chapter", ]$title <- "Surviving R. Kelly"
netflix_top10_us_in[netflix_top10_us_in$title == "The Hangover: Part III", ]$title <- "The Hangover Part III"
netflix_top10_us_in[netflix_top10_us_in$title == "The Hangover: Part II", ]$title <- "The Hangover Part II"
netflix_top10_us_in[netflix_top10_us_in$title == "Tyler Perry's I Can Do Bad All by Myself", ]$title <- "I Can Do Bad All By Myself"
netflix_top10_us_in[netflix_top10_us_in$title == "My Lover My Killer", ]$title <- "My Lover, My Killer"
netflix_top10_us_in[netflix_top10_us_in$title == "Pamela, a love story", ]$title <- "Pamela, A Love Story"
netflix_top10_us_in[netflix_top10_us_in$title == "Roald Dahl's Matilda The Musical", ]$title <- "Roald Dahl's Matilda the Musical"
netflix_top10_us_in[netflix_top10_us_in$title == "MADOFF: The Monster of Wall Street", ]$title <- "Madoff: The Monster of Wall Street"
netflix_top10_us_in[netflix_top10_us_in$title == "I AM A KILLER", ]$title <- "I Am a Killer"
netflix_top10_us_in[netflix_top10_us_in$title == "Guillermo del Toro’s Pinocchio", ]$title <- "Guillermo del Toro's Pinocchio"
netflix_top10_us_in[netflix_top10_us_in$title == "Snack VS. Chef", ]$title <- "Snack vs Chef"
netflix_top10_us_in[netflix_top10_us_in$title == "I AM A STALKER", ]$title <- "I Am a Stalker"
netflix_top10_us_in[netflix_top10_us_in$title == "Minions & More Volume 1", ]$title <- "Minions & More 1"
netflix_top10_us_in[netflix_top10_us_in$title == "Selling The OC", ]$title <- "Selling the OC"
netflix_top10_us_in[netflix_top10_us_in$title == "Glow Up", ]$title <- "Glow Up: Britain's Next Make-Up Star"
netflix_top10_us_in[netflix_top10_us_in$title == "Trainwreck", ]$title <- "Trainwreck: Woodstock '99"
netflix_top10_us_in[netflix_top10_us_in$title == "The UnXplained with William Shatner", ]$title <- "The UnXplained"
netflix_top10_us_in[netflix_top10_us_in$title == "Bullsh*t The Gameshow", ]$title <- "Bullsh*t the Game Show"
netflix_top10_us_in[netflix_top10_us_in$title == "CoComelon", ]$title <- "Cocomelon"
netflix_top10_us_in[netflix_top10_us_in$title == "A Madea Homecoming", ]$title <- "Tyler Perry's A Madea Homecoming"
netflix_top10_us_in[netflix_top10_us_in$title == "jeen-yuhs: A Kanye Trilogy", ]$title <- "jeen-yuhs"
netflix_top10_us_in[netflix_top10_us_in$title == "Peter Rabbit 2", ]$title <- "Peter Rabbit 2: The Runaway"
netflix_top10_us_in[netflix_top10_us_in$title == "Twentysomethings: Austin", ]$title <- "Twenty Somethings: Austin"
netflix_top10_us_in[netflix_top10_us_in$title == "Single All The Way", ]$title <- "Single All the Way"
netflix_top10_us_in[netflix_top10_us_in$title == "Jurassic World Camp Cretaceous", ]$title <- "Jurassic World: Camp Cretaceous"
netflix_top10_us_in[netflix_top10_us_in$title == "A Castle For Christmas", ]$title <- "A Castle for Christmas"
netflix_top10_us_in[netflix_top10_us_in$title == "Shameless (U.S.)", ]$title <- "Shameless"
netflix_top10_us_in[netflix_top10_us_in$title == "Britney Vs Spears", ]$title <- "Britney vs. Spears"
netflix_top10_us_in[netflix_top10_us_in$title == "Hunter X Hunter (2011)", ]$title <- "Hunter x Hunter"
netflix_top10_us_in[netflix_top10_us_in$title == "The Mitchells vs. The Machines", ]$title <- "The Mitchells vs. the Machines"
netflix_top10_us_in[netflix_top10_us_in$title == "Under Suspicion", ]$title <- "Under Suspicion: Uncovering the Wesphael Case"
netflix_top10_us_in[netflix_top10_us_in$title == "The Unbroken Voice", ]$title <- "Canto para no llorar, Arelys Henao"
netflix_top10_us_in[netflix_top10_us_in$title == "Don't Pick Up the Phone", ]$title <- "Pervert: Hunting the Strip Search Caller"
netflix_top10_us_in[netflix_top10_us_in$title == "Newly Rich, Newly Poor", ]$title <- "Nuevo Rico Nuevo Pobre"
# Merge again with the new title names
netflix_top10_merged_us_in <- netflix_top10_us_in %>%
left_join(netflix_info, by=c("title"="title"))
# unique(netflix_top10_merged_us_in[is.na(netflix_top10_merged_us_in$id), "show_title"])
# ^ still 255 unique missing titles but we will try to deal with these below
# Manually input titles that are on Netflix but may be new and not already in the dataset or somehow got missed
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Faraaz",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Faraaz",]$release_year <- 2023
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Faraaz",]$age_certification <- "TV-MA"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Faraaz",]$runtime <- 112
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Faraaz",]$genres <- "['thriller', 'action']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Faraaz",]$production_countries <- "['IN']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Faraaz",]$imdb_score <- 4.8
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Faraaz",]$imdb_votes <- 1140
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Murder Mystery 2",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Murder Mystery 2",]$release_year <- 2023
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Murder Mystery 2",]$age_certification <- "PG-13"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Murder Mystery 2",]$runtime <- 88
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Murder Mystery 2",]$genres <- "['comedy', 'crime', 'romance', 'action']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Murder Mystery 2",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Murder Mystery 2",]$imdb_score <- 5.6
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Murder Mystery 2",]$imdb_votes <- 38841
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Chor Nikal Ke Bhaga",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Chor Nikal Ke Bhaga",]$release_year <- 2023
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Chor Nikal Ke Bhaga",]$age_certification <- "TV-MA"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Chor Nikal Ke Bhaga",]$runtime <- 110
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Chor Nikal Ke Bhaga",]$genres <- "['thriller', 'drama', 'crime']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Chor Nikal Ke Bhaga",]$production_countries <- "['IN']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Chor Nikal Ke Bhaga",]$imdb_score <- 7.9
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Chor Nikal Ke Bhaga",]$imdb_votes <- 43890
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Amigos",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Amigos",]$release_year <- 2023
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Amigos",]$age_certification <- "TV-MA"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Amigos",]$runtime <- 137
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Amigos",]$genres <- "['action', 'thriller', 'crime']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Amigos",]$production_countries <- "['IN']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Amigos",]$imdb_score <- 6
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Amigos",]$imdb_votes <- 2440
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title %in% c("Sir (Hindi)", "Vaathi", "Sir"),]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title %in% c("Sir (Hindi)", "Vaathi", "Sir"),]$release_year <- 2023
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title %in% c("Sir (Hindi)", "Vaathi", "Sir"),]$age_certification <- "TV-PG"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title %in% c("Sir (Hindi)", "Vaathi", "Sir"),]$runtime <- 137
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title %in% c("Sir (Hindi)", "Vaathi", "Sir"),]$genres <- "['drama', 'action', 'comedy', 'romance']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title %in% c("Sir (Hindi)", "Vaathi", "Sir"),]$production_countries <- "['IN']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title %in% c("Sir (Hindi)", "Vaathi", "Sir"),]$imdb_score <- 7.4
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title %in% c("Sir (Hindi)", "Vaathi", "Sir"),]$imdb_votes <- 6683
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Kill Boksoon",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Kill Boksoon",]$release_year <- 2023
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Kill Boksoon",]$age_certification <- "R"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Kill Boksoon",]$runtime <- 137
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Kill Boksoon",]$genres <- "['action', 'thriller']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Kill Boksoon",]$production_countries <- "['KR']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Kill Boksoon",]$imdb_score <- 6.6
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Kill Boksoon",]$imdb_votes <- 5887
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Chupa",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Chupa",]$release_year <- 2023
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Chupa",]$age_certification <- "PG"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Chupa",]$runtime <- 95
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Chupa",]$genres <- "['fantasy', 'drama', 'family', 'action']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Chupa",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Chupa",]$imdb_score <- 5.6
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Chupa",]$imdb_votes <- 3765
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Almost Pyaar With DJ Mohabbat",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Almost Pyaar With DJ Mohabbat",]$release_year <- 2023
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Almost Pyaar With DJ Mohabbat",]$age_certification <- "TV-MA"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Almost Pyaar With DJ Mohabbat",]$runtime <- 119
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Almost Pyaar With DJ Mohabbat",]$genres <- "['drama', 'romance']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Almost Pyaar With DJ Mohabbat",]$production_countries <- "['IN']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Almost Pyaar With DJ Mohabbat",]$imdb_score <- 4.8
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Almost Pyaar With DJ Mohabbat",]$imdb_votes <- 769
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Night Agent",]$type <- "SHOW"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Night Agent",]$release_year <- 2023
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Night Agent",]$age_certification <- "TV-MA"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Night Agent",]$runtime <- 49
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Night Agent",]$genres <- "['drama', 'thriller', 'action']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Night Agent",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Night Agent",]$seasons <- 1
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Night Agent",]$imdb_score <- 7.6
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Night Agent",]$imdb_votes <- 48665
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "IRL - In Real Love",]$type <- "SHOW"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "IRL - In Real Love",]$release_year <- 2023
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "IRL - In Real Love",]$age_certification <- "TV-MA"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "IRL - In Real Love",]$runtime <- 52
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "IRL - In Real Love",]$genres <- "['reality', 'romance']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "IRL - In Real Love",]$production_countries <- "['IN']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "IRL - In Real Love",]$seasons <- 1
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "IRL - In Real Love",]$imdb_score <- 2.9
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "IRL - In Real Love",]$imdb_votes <- 140
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Raees",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Raees",]$release_year <- 2017
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Raees",]$age_certification <- "R"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Raees",]$runtime <- 161
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Raees",]$genres <- "['thriller', 'action', 'crime', 'drama']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Raees",]$production_countries <- "['IN']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Raees",]$imdb_score <- 6.6
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Raees",]$imdb_votes <- 47157
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Magician's Elephant",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Magician's Elephant",]$release_year <- 2023
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Magician's Elephant",]$age_certification <- "PG"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Magician's Elephant",]$runtime <- 99
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Magician's Elephant",]$genres <- "['animation', 'comedy', 'drama', 'family', 'fantasy', 'action']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Magician's Elephant",]$production_countries <- "['US', 'AU']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Magician's Elephant",]$imdb_score <- 6.5
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Magician's Elephant",]$imdb_votes <- 3381
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Butta Bomma",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Butta Bomma",]$release_year <- 2023
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Butta Bomma",]$age_certification <- "TV-14"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Butta Bomma",]$runtime <- 122
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Butta Bomma",]$genres <- "['romance', 'drama']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Butta Bomma",]$production_countries <- "['IN']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Butta Bomma",]$imdb_score <- 6.4
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Butta Bomma",]$imdb_votes <- 294
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Bourne Legacy",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Bourne Legacy",]$release_year <- 2012
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Bourne Legacy",]$age_certification <- "PG-13"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Bourne Legacy",]$runtime <- 135
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Bourne Legacy",]$genres <- "['thriller', 'action']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Bourne Legacy",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Bourne Legacy",]$imdb_score <- 6.6
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Bourne Legacy",]$imdb_votes <- 307489
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Shark Tale",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Shark Tale",]$release_year <- 2004
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Shark Tale",]$age_certification <- "PG"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Shark Tale",]$runtime <- 90
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Shark Tale",]$genres <- "['animation', 'action', 'comedy', 'family','thriller','fantasy']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Shark Tale",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Shark Tale",]$imdb_score <- 6
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Shark Tale",]$imdb_votes <- 188868
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Matilda",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Matilda",]$release_year <- 1996
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Matilda",]$age_certification <- "PG"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Matilda",]$runtime <- 98
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Matilda",]$genres <- "['family', 'fantasy', 'comedy']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Matilda",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Matilda",]$imdb_score <- 7
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Matilda",]$imdb_votes <- 162253
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Hotel Transylvania",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Hotel Transylvania",]$release_year <- 2012
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Hotel Transylvania",]$age_certification <- "PG"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Hotel Transylvania",]$runtime <- 91
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Hotel Transylvania",]$genres <- "['comedy','animation', 'family', 'fantasy', 'horror','romance']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Hotel Transylvania",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Hotel Transylvania",]$imdb_score <- 7
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Hotel Transylvania",]$imdb_votes <- 263883
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Thing",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Thing",]$release_year <- 2011
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Thing",]$age_certification <- "R"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Thing",]$runtime <- 103
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Thing",]$genres <- "['horror', 'thriller', 'scifi']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Thing",]$production_countries <- "['US', 'CA']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Thing",]$imdb_score <- 6.2
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Thing",]$imdb_votes <- 138049
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dr. Seuss' The Lorax",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dr. Seuss' The Lorax",]$release_year <- 2012
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dr. Seuss' The Lorax",]$age_certification <- "PG"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dr. Seuss' The Lorax",]$runtime <- 86
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dr. Seuss' The Lorax",]$genres <- "['animation', 'comedy', 'drama', 'family','fantasy']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dr. Seuss' The Lorax",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dr. Seuss' The Lorax",]$imdb_score <- 6.4
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dr. Seuss' The Lorax",]$imdb_votes <- 118694
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "BEEF",]$type <- "SHOW"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "BEEF",]$release_year <- 2023
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "BEEF",]$age_certification <- "TV-MA"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "BEEF",]$runtime <- 35
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "BEEF",]$genres <- "['comedy', 'drama']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "BEEF",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "BEEF",]$seasons <- 1
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "BEEF",]$imdb_score <- 8.4
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "BEEF",]$imdb_votes <- 29991
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Unstable",]$type <- "SHOW"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Unstable",]$release_year <- 2023
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Unstable",]$age_certification <- "TV-14"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Unstable",]$runtime <- 25
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Unstable",]$genres <- "['drama','comedy']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Unstable",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Unstable",]$seasons <- 1
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Unstable",]$imdb_score <- 6.8
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Unstable",]$imdb_votes <- 4160
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Emergency: NYC",]$type <- "SHOW"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Emergency: NYC",]$release_year <- 2023
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Emergency: NYC",]$age_certification <- "TV-MA"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Emergency: NYC",]$runtime <- 42
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Emergency: NYC",]$genres <- "['documentation']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Emergency: NYC",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Emergency: NYC",]$seasons <- 1
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Emergency: NYC",]$imdb_score <- 8.3
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Emergency: NYC",]$imdb_votes <- 763
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Hoarders",]$type <- "SHOW"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Hoarders",]$release_year <- 2009
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Hoarders",]$age_certification <- "TV-14"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Hoarders",]$runtime <- 51
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Hoarders",]$genres <- "['documentation','reality','drama']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Hoarders",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Hoarders",]$seasons <- 13
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Hoarders",]$imdb_score <- 6.5
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Hoarders",]$imdb_votes <- 3261
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Transatlantic",]$type <- "SHOW"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Transatlantic",]$release_year <- 2023
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Transatlantic",]$age_certification <- "TV-14"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Transatlantic",]$runtime <- 50
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Transatlantic",]$genres <- "['war','drama','history']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Transatlantic",]$production_countries <- "['DE', 'FR']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Transatlantic",]$seasons <- 1
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Transatlantic",]$imdb_score <- 6.4
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Transatlantic",]$imdb_votes <- 2006
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "I See You",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "I See You",]$release_year <- 2019
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "I See You",]$age_certification <- "R"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "I See You",]$runtime <- 96
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "I See You",]$genres <- "['thriller', 'crime', 'drama','horror']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "I See You",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "I See You",]$imdb_score <- 6.8
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "I See You",]$imdb_votes <- 57202
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dragged Across Concrete",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dragged Across Concrete",]$release_year <- 2018
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dragged Across Concrete",]$age_certification <- "R"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dragged Across Concrete",]$runtime <- 159
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dragged Across Concrete",]$genres <- "['thriller', 'action', 'crime', 'drama']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dragged Across Concrete",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dragged Across Concrete",]$imdb_score <- 6.9
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dragged Across Concrete",]$imdb_votes <- 50464
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Diary of a Mad Black Woman",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Diary of a Mad Black Woman",]$release_year <- 2005
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Diary of a Mad Black Woman",]$age_certification <- "PG-13"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Diary of a Mad Black Woman",]$runtime <- 116
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Diary of a Mad Black Woman",]$genres <- "['romace', 'comedy', 'drama']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Diary of a Mad Black Woman",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Diary of a Mad Black Woman",]$imdb_score <- 5.7
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Diary of a Mad Black Woman",]$imdb_votes <- 15243
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Who Were We Running From?",]$type <- "SHOW"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Who Were We Running From?",]$release_year <- 2023
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Who Were We Running From?",]$age_certification <- "TV-MA"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Who Were We Running From?",]$runtime <- 41
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Who Were We Running From?",]$genres <- "['crime','drama','thriller']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Who Were We Running From?",]$production_countries <- "['TR']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Who Were We Running From?",]$seasons <- 1
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Who Were We Running From?",]$imdb_score <- 5.9
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Who Were We Running From?",]$imdb_votes <- 2545
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Waco: American Apocalypse",]$type <- "SHOW"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Waco: American Apocalypse",]$release_year <- 2023
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Waco: American Apocalypse",]$age_certification <- "TV-MA"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Waco: American Apocalypse",]$runtime <- 47
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Waco: American Apocalypse",]$genres <- "['documentation','crime']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Waco: American Apocalypse",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Waco: American Apocalypse",]$seasons <- 1
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Waco: American Apocalypse",]$imdb_score <- 7.0
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Waco: American Apocalypse",]$imdb_votes <- 4487
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Unseen",]$type <- "SHOW"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Unseen",]$release_year <- 2023
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Unseen",]$age_certification <- "TV-MA"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Unseen",]$runtime <- 45
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Unseen",]$genres <- "['drama','thriller','crime']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Unseen",]$production_countries <- "['ZA']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Unseen",]$seasons <- 1
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Unseen",]$imdb_score <- 5.9
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Unseen",]$imdb_votes <- 559
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Riddick",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Riddick",]$release_year <- 2013
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Riddick",]$age_certification <- "R"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Riddick",]$runtime <- 119
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Riddick",]$genres <- "['scifi', 'action', 'thriller']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Riddick",]$production_countries <- "['US,'CA']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Riddick",]$imdb_score <- 6.4
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Riddick",]$imdb_votes <- 171855
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Chronicles of Riddick",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Chronicles of Riddick",]$release_year <- 2004
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Chronicles of Riddick",]$age_certification <- "PG-13"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Chronicles of Riddick",]$runtime <- 134
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Chronicles of Riddick",]$genres <- "['scifi', 'action', 'thriller']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Chronicles of Riddick",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Chronicles of Riddick",]$imdb_score <- 6.6
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The Chronicles of Riddick",]$imdb_votes <- 234594
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Pitch Black",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Pitch Black",]$release_year <- 2000
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Pitch Black",]$age_certification <- "R"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Pitch Black",]$runtime <- 108
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Pitch Black",]$genres <- "['thriller', 'action', 'horror', 'scifi']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Pitch Black",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Pitch Black",]$imdb_score <- 7.1
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Pitch Black",]$imdb_votes <- 245789
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Money Shot: The Pornhub Story",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Money Shot: The Pornhub Story",]$release_year <- 2023
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Money Shot: The Pornhub Story",]$age_certification <- "R"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Money Shot: The Pornhub Story",]$runtime <- 95
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Money Shot: The Pornhub Story",]$genres <- "['documentation']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Money Shot: The Pornhub Story",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Money Shot: The Pornhub Story",]$imdb_score <- 5.4
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Money Shot: The Pornhub Story",]$imdb_votes <- 3386
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Kick-Ass 2",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Kick-Ass 2",]$release_year <- 2013
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Kick-Ass 2",]$age_certification <- "R"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Kick-Ass 2",]$runtime <- 103
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Kick-Ass 2",]$genres <- "['action', 'comedy', 'crime', 'thriller']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Kick-Ass 2",]$production_countries <- "['UK', 'US', 'JP']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Kick-Ass 2",]$imdb_score <- 6.5
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Kick-Ass 2",]$imdb_votes <- 278748
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Bad Boys II",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Bad Boys II",]$release_year <- 2003
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Bad Boys II",]$age_certification <- "R"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Bad Boys II",]$runtime <- 147
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Bad Boys II",]$genres <- "['comedy', 'thriller', 'crime', 'action']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Bad Boys II",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Bad Boys II",]$imdb_score <- 6.6
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Bad Boys II",]$imdb_votes <- 257144
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Battleship",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Battleship",]$release_year <- 2012
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Battleship",]$age_certification <- "PG-13"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Battleship",]$runtime <- 131
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Battleship",]$genres <- "['thriller', 'action', 'scifi', 'documentation']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Battleship",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Battleship",]$imdb_score <- 5.8
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Battleship",]$imdb_votes <- 251488
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The God Committee",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The God Committee",]$release_year <- 2021
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The God Committee",]$age_certification <- "TV-MA"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The God Committee",]$runtime <- 98
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The God Committee",]$genres <- "['drama', 'thriller']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The God Committee",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The God Committee",]$imdb_score <- 5.8
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "The God Committee",]$imdb_votes <- 4329
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dr. Seuss' The Cat in the Hat",]$type <- "MOVIE"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dr. Seuss' The Cat in the Hat",]$release_year <- 2003
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dr. Seuss' The Cat in the Hat",]$age_certification <- "PG"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dr. Seuss' The Cat in the Hat",]$runtime <- 81
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dr. Seuss' The Cat in the Hat",]$genres <- "['comedy', 'family', 'fantasy']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dr. Seuss' The Cat in the Hat",]$production_countries <- "['US']"
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dr. Seuss' The Cat in the Hat",]$imdb_score <- 4
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dr. Seuss' The Cat in the Hat",]$imdb_votes <- 84828
netflix_top10_merged_us_in[netflix_top10_merged_us_in$show_title == "Dr. Seuss' The Cat in the Hat",]$type <- "MOVIE"
# unique(netflix_top10_merged_us_in[is.na(netflix_top10_merged_us_in$type), "show_title"])
# ^215 missings but these aren't on Netflix anymore, will remove them
netflix_top10_merged_us_in <- netflix_top10_merged_us_in[!is.na(netflix_top10_merged_us_in$type),]
# Take care of duplicate titles
# netflix_top10_merged_us_in %>%
# group_by(country_name, week, category, weekly_rank) %>%
# filter(n() > 1)
netflix_top10_merged_us_in <- netflix_top10_merged_us_in[!(netflix_top10_merged_us_in$country_iso2 == "IN" & netflix_top10_merged_us_in$week >= "2023-01-22" & netflix_top10_merged_us_in$title == "Dhamaka" & netflix_top10_merged_us_in$release_year == 2021),]
netflix_top10_merged_us_in <- netflix_top10_merged_us_in[!(netflix_top10_merged_us_in$country_iso2 == "IN" & netflix_top10_merged_us_in$week >= "2022-11-06" & netflix_top10_merged_us_in$title == "The Ghost" & netflix_top10_merged_us_in$release_year == 2007),]
netflix_top10_merged_us_in <- netflix_top10_merged_us_in[!(netflix_top10_merged_us_in$week <= "2022-07-31" & netflix_top10_merged_us_in$title == "Resident Evil" & netflix_top10_merged_us_in$type == "MOVIE"),]
netflix_top10_merged_us_in <- netflix_top10_merged_us_in[!(netflix_top10_merged_us_in$country_iso2 == "IN" & netflix_top10_merged_us_in$week >= "2022-06-12" & netflix_top10_merged_us_in$title == "Don" & netflix_top10_merged_us_in$release_year == 2006),]
netflix_top10_merged_us_in <- netflix_top10_merged_us_in[!(netflix_top10_merged_us_in$country_iso2 == "IN" & netflix_top10_merged_us_in$week <= "2021-12-19" & netflix_top10_merged_us_in$title == "Dhamaka" & netflix_top10_merged_us_in$release_year == 2022),]
netflix_top10_merged_us_in <- netflix_top10_merged_us_in[!(netflix_top10_merged_us_in$week <= "2021-12-05" & netflix_top10_merged_us_in$title == "Cowboy Bebop" & netflix_top10_merged_us_in$release_year == 1998),]
netflix_top10_merged_us_in <- netflix_top10_merged_us_in[!(netflix_top10_merged_us_in$week >= "2021-12-05" & netflix_top10_merged_us_in$title == "Life" & netflix_top10_merged_us_in$release_year == 2018),]
netflix_top10_merged_us_in <- netflix_top10_merged_us_in[!(netflix_top10_merged_us_in$week >= "2022-10-23" & netflix_top10_merged_us_in$title == "The Stranger" & netflix_top10_merged_us_in$type == "SHOW"),]
netflix_top10_merged_us_in <- netflix_top10_merged_us_in[!(netflix_top10_merged_us_in$week == "2022-10-23" & netflix_top10_merged_us_in$title == "The Mole" & netflix_top10_merged_us_in$release_year == 2001),]
netflix_top10_merged_us_in <- netflix_top10_merged_us_in[!(netflix_top10_merged_us_in$week == "2022-03-27" & netflix_top10_merged_us_in$title == "Top Boy" & netflix_top10_merged_us_in$release_year == 2011),]
netflix_top10_merged_us_in <- netflix_top10_merged_us_in[!(netflix_top10_merged_us_in$week == "2021-10-03" & netflix_top10_merged_us_in$title == "Till Death" & netflix_top10_merged_us_in$type == "SHOW"),]
# netflix_top10_merged_us_in %>%
# group_by(country_name, week, category, weekly_rank) %>%
# filter(n() > 1)
# ^ Now there are no duplicates
# Convert the genres column to only include first genre
netflix_top10_merged_us_in$genres <- gsub("'", '', str_split_i(gsub("\\[|\\]", "", netflix_top10_merged_us_in$genres), ",", 1))
# Convert production_countries column to only include first production country
netflix_top10_merged_us_in$production_countries <- gsub("'", '', str_split_i(gsub("\\[|\\]", "", netflix_top10_merged_us_in$production_countries), ",", 1))
# Only consider content that was made in US or India
netflix_top10_merged_us_in <- netflix_top10_merged_us_in[netflix_top10_merged_us_in$production_countries == "US" | netflix_top10_merged_us_in$production_countries == "IN",]
# Get the US top charts content
netflix_top10_merged_us <- netflix_top10_merged_us_in[netflix_top10_merged_us_in$country_iso2 == "US",]
# Get the India top charts content
netflix_top10_merged_in <- netflix_top10_merged_us_in[netflix_top10_merged_us_in$country_iso2 == "IN",]
# Get only unique titles for each
unique_netflix_top10_merged_us_in <- netflix_top10_merged_us_in[!duplicated(netflix_top10_merged_us_in$title),]
unique_netflix_top10_merged_us <- netflix_top10_merged_us[!duplicated(netflix_top10_merged_us$title),]
unique_netflix_top10_merged_in <- netflix_top10_merged_in[!duplicated(netflix_top10_merged_in$title),]
Netflix Plots
# Group my content and top charts content by production country and get percentages in each year, and also group by type of content for some
my_unique_titles_us_in_grouped <- my_unique_titles_us_in %>%
group_by(year=year(Date), country=as.factor(production_countries), .drop=FALSE) %>%
summarize(count=n()) %>%
mutate(prop=(count/sum(count)) * 100) %>%
mutate(rounded_prop=round(prop, 1)) %>%
mutate(country_full = ifelse(country == "IN", "India", "United States"))
my_unique_titles_us_in_by_type <- my_unique_titles_us_in %>%
group_by(type, year=year(Date), country=as.factor(production_countries), .drop=FALSE) %>%
summarize(count=n()) %>%
mutate(prop=(count/sum(count)) * 100) %>%
mutate(rounded_prop=round(prop, 1)) %>%
mutate(country_full = ifelse(country == "IN", "India", "United States"),
new_type = ifelse(type == "MOVIE", "Movies", "Shows"))
unique_netflix_top10_merged_us_grouped <- unique_netflix_top10_merged_us %>%
group_by(year=year(week), country=as.factor(production_countries), .drop=FALSE) %>%
summarize(count=n()) %>%
mutate(prop=(count/sum(count)) * 100) %>%
mutate(rounded_prop=round(prop, 1)) %>%
mutate(country_full = ifelse(country == "IN", "India", "United States"))
unique_netflix_top10_merged_us_by_type <- unique_netflix_top10_merged_us %>%
group_by(type, year=year(week), country=as.factor(production_countries), .drop=FALSE) %>%
summarize(count=n()) %>%
mutate(prop=(count/sum(count)) * 100) %>%
mutate(rounded_prop=round(prop, 1)) %>%
mutate(country_full = ifelse(country == "IN", "India", "United States"),
new_type = ifelse(type == "MOVIE", "Movies", "Shows"))
unique_netflix_top10_merged_in_grouped <- unique_netflix_top10_merged_in %>%
group_by(year=year(week), country=as.factor(production_countries), .drop=FALSE) %>%
summarize(count=n()) %>%
mutate(prop=(count/sum(count)) * 100) %>%
mutate(rounded_prop=round(prop, 1)) %>%
mutate(country_full = ifelse(country == "IN", "India", "United States"))
unique_netflix_top10_merged_in_by_type <- unique_netflix_top10_merged_in %>%
group_by(type, year=year(week), country=as.factor(production_countries), .drop=FALSE) %>%
summarize(count=n()) %>%
mutate(prop=(count/sum(count)) * 100) %>%
mutate(rounded_prop=round(prop, 1)) %>%
mutate(country_full = ifelse(country == "IN", "India", "United States"),
new_type = ifelse(type == "MOVIE", "Movies", "Shows"))
# Make plots with plotly and ggplotly
p1 <- plot_ly(my_unique_titles_us_in_grouped, x=~year, y=~prop, color=~country_full, type="bar",
hoverinfo="text", hovertext = paste("Country:",
my_unique_titles_us_in_grouped$country_full,
"\nPercentage:",
paste0(my_unique_titles_us_in_grouped$rounded_prop,"%"),
"\nCount:", my_unique_titles_us_in_grouped$count),
colors=c("#128807", "#0A3161")) %>%
layout(xaxis=list(title = "Year"), yaxis = list(title = "Percentage", range=c(0,100)))
p2 <- ggplot(my_unique_titles_us_in_by_type, aes(x=year, y=prop, fill=as.factor(country_full), text= paste(
"Country:", country_full, "\nPercentage:", paste0(rounded_prop,"%"), "\nCount:", count
))) + geom_bar(stat="identity", position = "dodge") + facet_wrap(~new_type) + theme_minimal() + theme(legend.position="none", panel.grid.major.x = element_blank(), axis.text.x=element_blank(), axis.title.x=element_blank()) + scale_fill_manual(values=c("#128807", "#0A3161")) + labs(x="Year Streamed", y="Percentage")
p2 <- ggplotly(p2, tooltip="text")
p3 <- plot_ly(unique_netflix_top10_merged_us_grouped, x=~year, y=~prop, color=~country_full, type="bar",
hoverinfo="text", hovertext = paste("Country:",
unique_netflix_top10_merged_us_grouped$country_full,
"\nPercentage:",
paste0(unique_netflix_top10_merged_us_grouped$rounded_prop,"%"),
"\nCount:", unique_netflix_top10_merged_us_grouped$count),
colors=c("#128807", "#0A3161")) %>%
layout(xaxis=list(title = "Year"), yaxis = list(title = "Percentage"), showlegend=F)
p4 <- ggplot(unique_netflix_top10_merged_us_by_type, aes(x=year, y=prop, fill=as.factor(country_full), text= paste(
"Country:", country_full, "\nPercentage:", paste0(rounded_prop,"%"), "\nCount:", count
))) + geom_bar(stat="identity", position = "dodge") + facet_wrap(~new_type) + theme_minimal() + theme(legend.position="none", panel.grid.major.x = element_blank(), strip.background = element_blank(), strip.text.x = element_blank(), axis.text.x=element_blank(), axis.title.x=element_blank()) + scale_fill_manual(values=c("#128807", "#0A3161")) + labs(x="Year Streamed", y="Percentage")
p4 <- ggplotly(p4, tooltip="text")
p5 <- plot_ly(unique_netflix_top10_merged_in_grouped, x=~year, y=~prop, color=~country_full,
type="bar",
hoverinfo="text", hovertext = paste("Country:",
unique_netflix_top10_merged_in_grouped$country_full,
"\nPercentage:",
paste0(unique_netflix_top10_merged_in_grouped$rounded_prop,"%"),
"\nCount:",
unique_netflix_top10_merged_in_grouped$count),
colors=c("#128807", "#0A3161")) %>%
layout(xaxis=list(title = "Year"), yaxis = list(title = "Percentage", range=c(0,100)),
showlegend=F)
p6 <- ggplot(unique_netflix_top10_merged_in_by_type, aes(x=year, y=prop, fill=as.factor(country_full), text= paste(
"Country:", country_full, "\nPercentage:", paste0(rounded_prop,"%"), "\nCount:", count
))) + geom_bar(stat="identity", position = "dodge") + ylim(0, 100) + facet_wrap(~new_type) + theme_minimal() + theme(legend.position="none", panel.grid.major.x = element_blank(), strip.background = element_blank(), strip.text.x = element_blank()) + scale_fill_manual(values=c("#128807", "#0A3161")) + labs(x="Year Streamed", y="Percentage")
p6 <- ggplotly(p6, tooltip="text")
# Make a subplot of these plotly plots
subplot1 <- subplot(p1, p3, p5, shareY = TRUE) %>%
layout(
title = "My Streamed Netflix Content and India's Weekly Top 10 Content
Have More Variety in Production Country",
margin = list(
l = 50,
r = 50,
b = 50,
t = 150,
pad = 4
)
) %>%
layout(
annotations = list(
list(
x = 0.15,
y = 1.1,
text = "My Streamed Netflix Content",
xref = "paper",
yref = "paper",
xanchor = "center",
yanchor = "bottom",
showarrow = F
),
list(
x = 0.53,
y = 1.05,
text = "Netflix Weekly Top 10\n Content- United States",
xref = "paper",
yref = "paper",
xanchor = "center",
yanchor = "bottom",
showarrow = F
),
list(
x = 0.85,
y = 1.05,
text = "Netflix Weekly Top 10\n Content- India",
xref = "paper",
yref = "paper",
xanchor = "center",
yanchor = "bottom",
showarrow = F
)
)
) %>%
add_annotations(
text = "Year Streamed",
x = 0.5,
y = 0,
yref = "paper",
xref = "paper",
xanchor = "center",
yanchor = "bottom",
yshift = -45,
showarrow = FALSE,
font = list(size = 14)
)
# Make a subset of these plotly plots
subplot2 <- subplot(p2, p4, p6, nrows=3) %>%
layout(
title = "I have Watched More Indian Movies and More American Shows in
Recent Years, Similar to the Highest Streamed Content in India",
font = list(size = 12),
margin = list(
l = 75,
r = 100,
b = 50,
t = 100,
pad = 4
)
) %>%
layout(
annotations = list(
list(
x = 1.08,
y = 0.8,
text = "My Streamed\n Netflix Content",
xref = "paper",
yref = "paper",
xanchor = "center",
yanchor = "bottom",
showarrow = F,
font = list(size = 10)
),
list(
x = 1.08,
y = 0.45,
text = "Netflix Weekly\n Top 10 Content- \nUnited States",
xref = "paper",
yref = "paper",
xanchor = "center",
yanchor = "bottom",
showarrow = F,
font = list(size = 10)
),
list(
x = 1.08,
y = 0.1,
text = "Netflix Weekly \nTop 10 Content- \nIndia",
xref = "paper",
yref = "paper",
xanchor = "center",
yanchor = "bottom",
showarrow = F,
font = list(size = 10)
)
)
) %>%
add_annotations(
text = "Year Streamed",
x = 0.5,
y = 0,
yref = "paper",
xref = "paper",
xanchor = "center",
yanchor = "bottom",
yshift = -45,
showarrow = FALSE,
font = list(size = 14)
) %>%
add_annotations(
text = "Percentage",
x = 0,
y = 0.4,
yref = "paper",
xref = "paper",
xanchor = "center",
yanchor = "bottom",
xshift = -45,
showarrow = FALSE,
font = list(size = 14),
textangle=-90
)
subplot1
subplot2
# Explore top genres for each of the datasets
sort(table(my_unique_titles_us_in$genres), decreasing=TRUE)
##
## drama comedy thriller romance action
## 19 13 8 7 2
## fantasy crime documentation family history
## 2 1 1 1 1
## reality scifi
## 1 1
print("________")
## [1] "________"
sort(table(unique_netflix_top10_merged_us$genres), decreasing=TRUE)
##
## drama comedy documentation thriller reality
## 88 66 42 35 30
## action crime scifi romance animation
## 26 19 18 15 13
## horror fantasy family war romace
## 13 8 2 2 1
print("________")
## [1] "________"
sort(table(unique_netflix_top10_merged_in$genres), decreasing=TRUE)
##
## drama comedy thriller action crime
## 94 52 36 29 19
## romance scifi documentation horror fantasy
## 17 11 9 9 5
## reality family history animation war
## 5 2 2 1 1
# Get my Netflix content (all of it, not just unique titles) that was made in US or India
netflix_merged_us_in <- netflix_merged[netflix_merged$production_countries == "US" | netflix_merged$production_countries == "IN",]
# Get counts of genres for each month, making sure the same title is only counted once in a week and only pick the popular genres across all the datasets that I just explored
my_netflix_genres_by_month <- netflix_merged_us_in %>%
mutate(month_num=as.Date(as.yearmon(Date)),
week_num=cut.Date(Date, breaks = "1 week", labels = FALSE),
genres=as.factor(genres)) %>%
group_by(month_num, week_num, Name, genres, .drop=FALSE) %>%
distinct(month_num, week_num, Name, genres, .drop=FALSE) %>%
group_by(month_num, genres) %>%
summarize(count=n()) %>%
mutate(prop=(count/sum(count)) * 100) %>%
mutate(rounded_prop=round(prop, 1)) %>%
filter(genres %in% c("drama", "comedy", "thriller", "action", "romance"))
netflix_top10_us_genres_by_month <- netflix_top10_merged_us %>%
mutate(month_num=as.Date(as.yearmon(week))) %>%
group_by(month_num, genres=as.factor(genres), .drop=FALSE) %>%
summarize(count=n()) %>%
mutate(prop=(count/sum(count)) * 100) %>%
mutate(rounded_prop=round(prop, 1)) %>%
filter(genres %in% c("drama", "comedy", "thriller", "action", "romance"))
netflix_top10_in_genres_by_month <- netflix_top10_merged_in %>%
mutate(month_num=as.Date(as.yearmon(week))) %>%
group_by(month_num, genres=as.factor(genres), .drop=FALSE) %>%
summarize(count=n()) %>%
mutate(prop=(count/sum(count)) * 100) %>%
mutate(rounded_prop=round(prop, 1)) %>%
filter(genres %in% c("drama", "comedy", "thriller", "action", "romance"))
# Create plots using ggplotly of this data
p7 <- ggplot(my_netflix_genres_by_month, aes(x=month_num, y=prop, group=genres, color=genres, text= paste("Month:", format(as.Date(month_num), "%B %Y"),
"\nPercentage:", paste0(rounded_prop,"%"), "\nCount:", count
))) + geom_line() + facet_wrap(~genres, ncol=5) + theme_minimal() + theme(legend.position="none", axis.text.x=element_blank(), axis.title.x=element_blank()) + scale_x_date(date_labels = "%b %Y") + scale_color_brewer(palette="Dark2")
p7 <- ggplotly(p7, tooltip="text")
p8 <- ggplot(netflix_top10_us_genres_by_month, aes(x=month_num, y=prop, group=genres, color=genres, text= paste("Month:", format(as.Date(month_num), "%B %Y"),
"\nPercentage:", paste0(rounded_prop,"%"), "\nCount:", count))) + geom_line() + facet_wrap(~genres, ncol=5) + theme_minimal() + theme(legend.position="none", strip.background = element_blank(), strip.text.x = element_blank(), axis.text.x=element_blank(), axis.title.x=element_blank()) + scale_x_date(date_labels = "%b %Y") + ylim(0, 100) + scale_color_brewer(palette="Dark2")
p8 <- ggplotly(p8, tooltip="text")
p9 <- ggplot(netflix_top10_in_genres_by_month, aes(x=month_num, y=prop, group=genres, color=genres, text= paste("Month:", format(as.Date(month_num), "%B %Y"),
"\nPercentage:", paste0(rounded_prop,"%"), "\nCount:", count))) + geom_line() + facet_wrap(~genres, ncol=5) + theme_minimal() + theme(legend.position="none", strip.background = element_blank(), strip.text.x = element_blank(), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + scale_x_date(date_labels = "%b %Y") + ylim(0, 100) + scale_color_brewer(palette="Dark2")
p9 <- ggplotly(p9, tooltip="text")
# Create subplot of these plots
subplot3 <- subplot(p7, p8, p9, nrows = 3) %>%
layout(title = "My General Viewing Habits Across Genres Over Time
Differ from the Similar Habits of the General United States
and Indian Netflix Audience",
font = list(size = 10),
margin = list(
l = 75,
r = 100,
b = 100,
t = 125,
pad = 4
)) %>%
layout(annotations = list(
list(
x = 1.08,
y = 0.8,
text = "My Streamed\n Netflix Content",
xref = "paper",
yref = "paper",
xanchor = "center",
yanchor = "bottom",
showarrow = F,
font = list(size = 10)
),
list(
x = 1.08,
y = 0.45,
text = "Netflix Weekly\n Top 10 Content- \nUnited States",
xref = "paper",
yref = "paper",
xanchor = "center",
yanchor = "bottom",
showarrow = F,
font = list(size = 10)
),
list(
x = 1.08,
y = 0.1,
text = "Netflix Weekly \nTop 10 Content- \nIndia",
xref = "paper",
yref = "paper",
xanchor = "center",
yanchor = "bottom",
showarrow = F,
font = list(size = 10)
)
)
) %>%
add_annotations(
text = "Month Streamed",
x = 0.5,
y = 0,
yref = "paper",
xref = "paper",
xanchor = "center",
yanchor = "bottom",
yshift = -90,
showarrow = FALSE,
font = list(size = 14)
) %>%
add_annotations(
text = "Percentage",
x = 0,
y = 0.4,
yref = "paper",
xref = "paper",
xanchor = "center",
yanchor = "bottom",
xshift = -45,
showarrow = FALSE,
font = list(size = 14),
textangle=-90
)
subplot3
# Explore missings for imdb score column and manually input imdb score
# netflix_merged_us_in[is.na(netflix_merged_us_in$imdb_score),]
netflix_merged_us_in[netflix_merged_us_in$Name == "Oh! Baby",]$imdb_score <- 7.4
netflix_merged_us_in[netflix_merged_us_in$Name == "The Fame Game",]$imdb_score <- 6.7
netflix_merged_us_in[netflix_merged_us_in$Name == "Red",]$imdb_score <- 6.5
# netflix_merged_us_in[is.na(netflix_merged_us_in$imdb_score),]
# ^ no more missing imdb scores
# Group by year/month and production country and get mean imdb score
my_netflix_avg_imdb_country <- netflix_merged_us_in %>%
group_by(year=year(Date), production_countries) %>%
summarize(count = n(), mean_imdb=mean(imdb_score)) %>%
mutate(round_score=round(mean_imdb, 1))
my_netflix_avg_imdb_country_month <- netflix_merged_us_in %>%
group_by(month=as.Date(as.yearmon(Date)), country=production_countries) %>%
summarize(count = n(), mean_imdb=mean(imdb_score)) %>%
mutate(round_score=round(mean_imdb, 1))
# netflix_top10_merged_us[is.na(netflix_top10_merged_us$imdb_score),]
# ^Explore missing imdb scores
# Manually input imdb scores
netflix_top10_merged_us[netflix_top10_merged_us$show_title == "Outlast",]$imdb_score <- 4.8
netflix_top10_merged_us[netflix_top10_merged_us$show_title == "Blackout",]$imdb_score <- 3.8
netflix_top10_merged_us[netflix_top10_merged_us$show_title == "Jackass 4.5",]$imdb_score <- 6.5
netflix_top10_merged_us[netflix_top10_merged_us$show_title == "Fear Street Part 3: 1666",]$imdb_score <- 6.6
# netflix_top10_merged_us[is.na(netflix_top10_merged_us$imdb_score),]
# ^no more missing imdb scores
netflix_top10_us_avg_imbd_country <- netflix_top10_merged_us %>%
group_by(year=year(week), production_countries) %>%
summarize(count = n(), mean_imdb=mean(imdb_score)) %>%
mutate(round_score=round(mean_imdb, 1))
netflix_top10_us_avg_imbd_country_month <- netflix_top10_merged_us %>%
group_by(month=as.Date(as.yearmon(week)), production_countries) %>%
summarize(count = n(), mean_imdb=mean(imdb_score)) %>%
mutate(round_score=round(mean_imdb, 1))
# netflix_top10_merged_in[is.na(netflix_top10_merged_in$imdb_score),]
# ^ Explore missing imdb scores
# Manually input imdb scores
netflix_top10_merged_in[netflix_top10_merged_in$show_title == "Rana Naidu",]$imdb_score <- 7.3
netflix_top10_merged_in[netflix_top10_merged_in$show_title == "The Elephant Whisperers",]$imdb_score <- 7.5
netflix_top10_merged_in[netflix_top10_merged_in$show_title == "Kaapa",]$imdb_score <- 6.1
netflix_top10_merged_in[netflix_top10_merged_in$show_title == "Varalaru Mukkiyam",]$imdb_score <- 4.1
netflix_top10_merged_in[netflix_top10_merged_in$show_title %in% c("Gatta Kusthi", "Matti Kusthi"),]$imdb_score <- 6.5
netflix_top10_merged_in[netflix_top10_merged_in$show_title == "CAT",]$imdb_score <- 8.2
netflix_top10_merged_in[netflix_top10_merged_in$show_title == "Kalagathalaivan",]$imdb_score <- 6.7
netflix_top10_merged_in[netflix_top10_merged_in$show_title == "She",]$imdb_score <- 6.4
netflix_top10_merged_in[netflix_top10_merged_in$show_title == "Mai: A Mother's Rage",]$imdb_score <- 7.1
netflix_top10_merged_in[netflix_top10_merged_in$show_title == "Night Drive",]$imdb_score <- 6.7
netflix_top10_merged_in[netflix_top10_merged_in$show_title == "The Fame Game",]$imdb_score <- 6.7
netflix_top10_merged_in[netflix_top10_merged_in$show_title == "Monster Hunter",]$imdb_score <- 5.2
netflix_top10_merged_in[netflix_top10_merged_in$show_title == "Fear Street Part 3: 1666",]$imdb_score <- 6.6
netflix_top10_merged_in[netflix_top10_merged_in$show_title == "Sniper: Ghost Shooter",]$imdb_score <- 5.4
netflix_top10_merged_in[netflix_top10_merged_in$show_title == "Raangi",]$imdb_score <- 4.8
# netflix_top10_merged_in[is.na(netflix_top10_merged_in$imdb_score),]
# ^no more missings
netflix_top10_in_avg_imbd_country <- netflix_top10_merged_in %>%
group_by(year=year(week), production_countries) %>%
summarize(count = n(), mean_imdb=mean(imdb_score)) %>%
mutate(round_score=round(mean_imdb, 1))
netflix_top10_in_avg_imbd_country_month <- netflix_top10_merged_in %>%
group_by(month=as.Date(as.yearmon(week)), production_countries) %>%
summarize(count = n(), mean_imdb=mean(imdb_score)) %>%
mutate(round_score=round(mean_imdb, 1))
# Make plots using ggplotly
p10 <- ggplot(my_netflix_avg_imdb_country, aes(x=year, y=mean_imdb, color=production_countries, group=production_countries, text= paste("Country:", production_countries, "\nYear:", year, "\nAverage IMDb Score:", round_score, "\nCount:", count))) + geom_point() + geom_line() + scale_x_continuous(breaks = seq(2021, 2023, by = 1)) + scale_color_manual(values=c("#128807", "#0A3161")) + theme_minimal() + theme(legend.position="none", axis.text.x=element_blank(), axis.title.x=element_blank()) + labs(x="Year Streamed", y="Average IMDb Score") + ylim(4, 9)
p10 <- ggplotly(p10, tooltip = "text")
p11 <- ggplot(my_netflix_avg_imdb_country_month, aes(x=month, y=mean_imdb, color=country, group=country, text= paste("Country:", country, "\nMonth:", format(as.Date(month), "%B %Y"),
"\nAverage IMDb Score:", round_score, "\nCount:", count))) + geom_point() + geom_line() + scale_x_date(date_labels = "%b %Y") + scale_color_manual(values=c("#128807", "#0A3161")) + theme_minimal() + theme(legend.position="none", axis.text.x=element_blank(), axis.title.x=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank()) + labs(x="Month Streamed", y="Average IMDb Score") + ylim(4, 9)
p11 <- ggplotly(p11, tooltip = "text")
p12 <- ggplot(netflix_top10_us_avg_imbd_country, aes(x=year, y=mean_imdb, color=production_countries, group=production_countries, text= paste("Country:", production_countries, "\nYear:", year, "\nAverage IMDb Score:", round_score, "\nCount:", count))) + geom_point() + geom_line() + scale_x_continuous(breaks = seq(2021, 2023, by = 1)) + scale_color_manual(values=c("#128807", "#0A3161")) + theme_minimal() + theme(legend.position="none", axis.text.x=element_blank(), axis.title.x=element_blank()) + labs(x="Year Streamed", y="Average IMDb Score") + ylim(4,9)
p12 <- ggplotly(p12, tooltip = "text")
p13 <- ggplot(netflix_top10_us_avg_imbd_country_month, aes(x=month, y=mean_imdb, color=production_countries, group=production_countries, text= paste("Country:", production_countries, "\nMonth:", format(as.Date(month), "%B %Y"),
"\nAverage IMDb Score:", round_score, "\nCount:", count))) + geom_point() + geom_line() + scale_x_date(date_labels = "%b %Y") + scale_color_manual(values=c("#128807", "#0A3161")) + theme_minimal() + theme(legend.position="none", axis.text.x=element_blank(), axis.title.x=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank()) + labs(x="Month Streamed", y="Average IMDb Score") + ylim(4,9)
p13 <- ggplotly(p13, tooltip = "text")
p14 <- ggplot(netflix_top10_in_avg_imbd_country, aes(x=year, y=mean_imdb, color=production_countries, group=production_countries, text= paste("Country:", production_countries, "\nYear:", year, "\nAverage IMDb Score:", round_score, "\nCount:", count))) + geom_point() + geom_line() + scale_x_continuous(breaks = seq(2021, 2023, by = 1)) + scale_color_manual(values=c("#128807", "#0A3161")) + theme_minimal() + theme(legend.position="none") + labs(x="Year Streamed", y="Average IMDb Score") + ylim(4,9)
p14 <- ggplotly(p14, tooltip = "text")
p15 <- ggplot(netflix_top10_in_avg_imbd_country_month, aes(x=month, y=mean_imdb, color=production_countries, group=production_countries, text= paste("Country:", production_countries, "\nMonth:", format(as.Date(month), "%B %Y"),
"\nAverage IMDb Score:", round_score, "\nCount:", count))) + geom_point() + geom_line() + scale_x_date(date_labels = "%b %Y") + scale_color_manual(values=c("#128807", "#0A3161")) + theme_minimal() + theme(legend.position="none", axis.text.y=element_blank(), axis.ticks.y=element_blank()) + labs(x="Month Streamed", y="Average IMDb Score") + ylim(4,9)
p15 <- ggplotly(p15, tooltip ="text")
# Make subplots of these plots
subplot4 <- subplot(p10, p11, p12, p13, p14, p15, nrows=3) %>%
layout(title = "The Average IMDb Scores by Year are Slightly
Higher for American Content in My Netflix Streaming History
and in the Indian Top 10 Weekly Chart",
font = list(size = 10),
margin = list(
l = 75,
r = 100,
b = 50,
t = 100,
pad = 4
)) %>%
layout(annotations = list(
list(
x = 1.08,
y = 0.8,
text = "My Streamed\n Netflix Content",
xref = "paper",
yref = "paper",
xanchor = "center",
yanchor = "bottom",
showarrow = F,
font = list(size = 10)
),
list(
x = 1.08,
y = 0.45,
text = "Netflix Weekly\n Top 10 Content- \nUnited States",
xref = "paper",
yref = "paper",
xanchor = "center",
yanchor = "bottom",
showarrow = F,
font = list(size = 10)
),
list(
x = 1.08,
y = 0.1,
text = "Netflix Weekly \nTop 10 Content- \nIndia",
xref = "paper",
yref = "paper",
xanchor = "center",
yanchor = "bottom",
showarrow = F,
font = list(size = 10)
)
)
) %>%
add_annotations(
text = "Year Streamed",
x = 0.2,
y = 0,
yref = "paper",
xref = "paper",
xanchor = "center",
yanchor = "bottom",
yshift = -40,
showarrow = FALSE,
font = list(size = 14)
) %>%
add_annotations(
text = "Month Streamed",
x = 0.75,
y = 0,
yref = "paper",
xref = "paper",
xanchor = "center",
yanchor = "bottom",
yshift = -40,
showarrow = FALSE,
font = list(size = 14)
) %>%
add_annotations(
text = "Average IMDb Score",
x = 0,
y = 0.25,
yref = "paper",
xref = "paper",
xanchor = "center",
yanchor = "bottom",
xshift = -45,
showarrow = FALSE,
font = list(size = 14),
textangle=-90
)
subplot4
Spotify Data Cleaning
# Read in my Spotify data
streaming0 <- read_json("StreamingHistory0.json", simplifyVector = TRUE)
streaming1 <- read_json("StreamingHistory1.json", simplifyVector = TRUE)
streaming2 <- read_json("StreamingHistory2.json", simplifyVector = TRUE)
streaming3 <- read_json("StreamingHistory3.json", simplifyVector = TRUE)
streaming_df <- rbind(streaming0, streaming1, streaming2, streaming3)
# Clean Spotify data
streaming_df$date <- as.Date(streaming_df$endTime)
streaming_df$secPlayed <- streaming_df$msPlayed / 1000
# This is a local file so I had to input it manually
streaming_df[streaming_df$artistName == "Unknown Artist",]$artistName <- "Sachin-Jigar"
streaming_df[streaming_df$trackName == "Unknown Track",]$trackName <- "Apna Bana Le"
# Only consider these 8 weeks because they are the most recent weeks available
streaming_recent <- streaming_df[streaming_df$date >= as.Date("2023-02-10") &
streaming_df$date <= as.Date("2023-04-06"),]
# Only consider songs that I played for 30 seconds or more as plays (Spotify convention)
streaming_recent_plays <- streaming_recent[streaming_recent$secPlayed >= 30,]
# Get my top artists per week
top6_artists_by_week <- streaming_recent_plays %>%
group_by(week = week(floor_date(
as.Date(streaming_recent_plays$date), "week", week_start = 5
)), artistName) %>%
summarize(count = n()) %>%
top_n(6) %>%
arrange(desc(count), .by_group = TRUE) %>%
mutate(rank = rank(-count))
# Get the number of plays of each artist during this time frame
artist_stream_count <- streaming_recent_plays %>%
group_by(artistName) %>%
summarize(totalCount = n())
# Merge my top artists per week with my total plays for the artist during this time frame (this is to deal with ties for ranks)
top6_artists_by_week <- top6_artists_by_week %>%
left_join(artist_stream_count, by="artistName")
# For 2 way ties, the artist with more plays during the time frame gets the higher rank
ties_2way <- top6_artists_by_week %>%
group_by(week, rank) %>%
filter(n() == 2) %>%
mutate(max = max(totalCount)) %>%
mutate(rank = ifelse(totalCount == max(totalCount), rank-0.5, rank+0.5)) %>%
select(-c(max))
# For 3 way ties, the artist with the most plays get the higher rank (lower number), the artist with the second most plays gets the current rank, and the artist with the third most plays gets the rank one lower (higher number)
ties_3way <- top6_artists_by_week %>%
group_by(week, rank) %>%
filter(n() == 3) %>%
mutate(max = max(totalCount),
secondMax = sort(totalCount)[2]) %>%
mutate(rank = case_when(totalCount == max~rank-1,
totalCount == secondMax~rank,
TRUE~rank+1)) %>%
select(-c(max, secondMax))
# Replace the top artist ties with the new values after breaking ties
top6_artists_by_week <- top6_artists_by_week %>%
anti_join(ties_2way, by = c("week", "artistName")) %>%
bind_rows(ties_2way) %>%
anti_join(ties_3way, by = c("week", "artistName")) %>%
bind_rows(ties_3way) %>%
arrange(week, rank)
Spotify Plots
# Make Bump Chart of Ranks
p16 <- ggplot(top6_artists_by_week, aes(x=week, y=rank, color=artistName)) + geom_bump() + geom_point(size=8) + scale_color_manual(values=c("#d24300", "#ff671f", "#ff9462", "#ffc2a5", "#0A3161"), limits=c("A.R. Rahman", "Anirudh Ravichander", "Pritam", "Sid Sriram", "Taylor Swift")) + scale_y_reverse(breaks=c(6, 5, 4, 3, 2, 1), limits=c(6,1)) + scale_x_continuous(breaks=c(6, 7, 8, 9, 10, 11, 12, 13), limits=c(6, 13)) + labs(color="Artist", x="Week of 2023", y="Rank") + theme_classic() + guides(colour = guide_legend(override.aes = list(size=5))) + theme(plot.title = element_text(face = "bold", hjust=0.5)) + labs(title="While the Majority of my Top Artists are Indian, \nI listen to Taylor Swift Consistently")
# Read in top chart data that I manually entered after pulling the ranks from the Spotify Charts website
artists_usa <- read.csv("spotify_top_artists_usa.csv")
artists_india <- read.csv("spotify_top_artists_india.csv")
# Make Bump Chart of Ranks
p17 <- ggplot(artists_usa, aes(x=week, y=rank, color=artistName)) + geom_bump() +
geom_point(size=8) + scale_color_manual(values=c("#B31942", "#c95e7a", "#d98ca0", "#536e90", "#0A3161", "#8498b0"), limits=c("21 Savage", "Drake", "Morgan Wallen", "SZA", "Taylor Swift", "The Weeknd")) + scale_y_reverse(breaks=c(6, 5, 4, 3, 2, 1), limits=c(6,1)) + scale_x_continuous(breaks=c(6, 7, 8, 9, 10, 11, 12, 13), limits=c(6, 13)) + labs(color="Artist", x="Week of 2023", y="Rank") + theme_classic() + guides(colour = guide_legend(override.aes = list(size=5))) + theme(plot.title = element_text(face = "bold", hjust=0.5)) + labs(title="Morgan Wallen Shoots to the Top Artist \nin the United States Spotify Charts")
p18 <- ggplot(artists_india, aes(x=week, y=rank, color=artistName)) + geom_bump() +
geom_point(size=8) + scale_color_manual(values=c("#d24300", "#ff671f", "#ffc2a5", "#ff9462", "#128807", "#70b76a"), limits=c("A.R. Rahman", "Anirudh Ravichander", "Arijit Singh", "Pritam", "Shreya Ghoshal", "Vishal-Shekhar")) + scale_y_reverse(breaks=c(6, 5, 4, 3, 2, 1), limits=c(6,1)) + scale_x_continuous(breaks=c(6, 7, 8, 9, 10, 11, 12, 13), limits=c(6, 13)) + labs(color="Artist", x="Week of 2023", y="Rank") + theme_classic() + guides(colour = guide_legend(override.aes = list(size=5))) + theme(plot.title = element_text(face = "bold", hjust=0.5)) + labs(title="The Top Artists on Spotify in India \nStay Fairly Consistent Over Time")
p16

p17

p18
